{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "popular-price",
   "metadata": {},
   "outputs": [],
   "source": [
    "import argparse\n",
    "\n",
    "import torch.distributed as dist\n",
    "import torch.optim as optim\n",
    "import torch.optim.lr_scheduler as lr_scheduler\n",
    "\n",
    "import test  # import test.py to get mAP after each epoch\n",
    "from models import *\n",
    "from utils.datasets import *\n",
    "from utils.utils import *\n",
    "\n",
    "from mymodel import *\n",
    "\n",
    "wdir = 'weights' + os.sep  # weights dir\n",
    "last = wdir + 'last.pt'\n",
    "best = wdir + 'best.pt'\n",
    "test_best = wdir + 'test_best.pt'\n",
    "results_file = 'results.txt'\n",
    "\n",
    "# Hyperparameters (results68: 59.9 mAP@0.5 yolov3-spp-416) https://github.com/ultralytics/yolov3/issues/310\n",
    "\n",
    "hyp = {'giou': 3.54,  # giou loss gain\n",
    "       'cls': 37.4,  # cls loss gain\n",
    "       'cls_pw': 1.0,  # cls BCELoss positive_weight\n",
    "       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)\n",
    "       'obj_pw': 1.0,  # obj BCELoss positive_weight\n",
    "       'iou_t': 0.225,  # iou training threshold\n",
    "       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)\n",
    "       'lrf': -4.,  # final LambdaLR learning rate = lr0 * (10 ** lrf)\n",
    "       'momentum': 0.937,  # SGD momentum\n",
    "       'weight_decay': 0.000484,  # optimizer weight decay\n",
    "       'fl_gamma': 0.5,  # focal loss gamma\n",
    "       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)\n",
    "       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)\n",
    "       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)\n",
    "       'degrees': 1.98,  # image rotation (+/- deg)\n",
    "       'translate': 0.05,  # image translation (+/- fraction)\n",
    "       'scale': 0.05,  # image scale (+/- gain)\n",
    "       'shear': 0.641}  # image shear (+/- deg)\n",
    "\n",
    "# Overwrite hyp with hyp*.txt (optional)\n",
    "f = glob.glob('hyp*.txt')\n",
    "if f:\n",
    "    print('Using %s' % f[0])\n",
    "    for k, v in zip(hyp.keys(), np.loadtxt(f[0])):\n",
    "        hyp[k] = v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "pleased-hamburg",
   "metadata": {},
   "outputs": [],
   "source": [
    "parser = argparse.ArgumentParser()\n",
    "parser.add_argument('--epochs', type=int, default=5)  # 500200 batches at bs 16, 117263 COCO images = 273 epochs\n",
    "parser.add_argument('--batch-size', type=int, default=16)  # effective bs = batch_size * accumulate = 16 * 4 = 64\n",
    "parser.add_argument('--accumulate', type=int, default=4, help='batches to accumulate before optimizing')\n",
    "parser.add_argument('--cfg', type=str, default='cfg/yolov3-tiny-1cls_1.cfg', help='*.cfg path')\n",
    "parser.add_argument('--data', type=str, default='data/coco2017.data', help='*.data path')\n",
    "parser.add_argument('--multi-scale', action='store_true', help='adjust (67% - 150%) img_size every 10 batches')\n",
    "parser.add_argument('--img-size', nargs='+', type=int, default=[320], help='train and test image-sizes')\n",
    "parser.add_argument('--rect', action='store_true', help='rectangular training')\n",
    "parser.add_argument('--resume', action='store_true', help='resume training from last.pt')\n",
    "parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')\n",
    "parser.add_argument('--notest', action='store_true', help='only test final epoch')\n",
    "parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')\n",
    "parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')\n",
    "parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')\n",
    "parser.add_argument('--weights', type=str, default='', help='initial weights path')\n",
    "parser.add_argument('--arc', type=str, default='default', help='yolo architecture')  # default, uCE, uBCE\n",
    "parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')\n",
    "parser.add_argument('--device', default='1', help='device id (i.e. 0 or 0,1 or cpu)')\n",
    "parser.add_argument('--adam', action='store_true', help='use adam optimizer')\n",
    "parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')\n",
    "parser.add_argument('--var', type=float, help='debug variable')\n",
    "opt = parser.parse_known_args()[0]\n",
    "opt.weights = last if opt.resume else opt.weights\n",
    "print(opt)\n",
    "print(opt.weights)\n",
    "device = torch_utils.select_device(opt.device, batch_size=opt.batch_size)\n",
    "print(device)\n",
    "\n",
    "tb_writer = None\n",
    "if not opt.evolve:  # Train normally\n",
    "    try:\n",
    "        # Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/\n",
    "        from torch.utils.tensorboard import SummaryWriter\n",
    "\n",
    "        tb_writer = SummaryWriter()\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "cfg = opt.cfg\n",
    "data = opt.data\n",
    "img_size, img_size_test = opt.img_size if len(opt.img_size) == 2 else opt.img_size * 2  # train, test sizes\n",
    "epochs = opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs\n",
    "batch_size = opt.batch_size\n",
    "accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64\n",
    "weights = opt.weights  # initial training weights\n",
    "\n",
    "# Initialize\n",
    "init_seeds()\n",
    "if opt.multi_scale:\n",
    "    img_sz_min = round(img_size / 32 / 1.5)\n",
    "    img_sz_max = round(img_size / 32* 1.5)\n",
    "    img_size = img_sz_max * 32  # initiate with maximum multi_scale size\n",
    "    print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))\n",
    "\n",
    "    # Configure run\n",
    "    # data_dict = parse_data_cfg(data)\n",
    "train_path = '../DAC-SDC2021/dataset/data_training'\n",
    "test_path = '../DAC-SDC2021/dataset/sample'\n",
    "nc = 1 \n",
    "\n",
    "    # Remove previous results\n",
    "for f in glob.glob('*_batch*.png') + glob.glob(results_file):\n",
    "    os.remove(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "coral-stereo",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = UltraNet_Bypass().to(device)\n",
    "#print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "intermediate-aspect",
   "metadata": {},
   "outputs": [],
   "source": [
    "pg0, pg1, pg2 = [], [], []  # optimizer parameter groups\n",
    "for k, v in dict(model.named_parameters()).items():\n",
    "    #print(k,\"\\n\")  # eg: layers.0.weight  layers.1.bias\n",
    "    #print(v)   # tensor value\n",
    "    if '.bias' in k:\n",
    "        pg2 += [v]  # biases\n",
    "    elif 'Conv2d.weight' in k:\n",
    "        pg1 += [v]  # apply weight_decay\n",
    "    else:\n",
    "        pg0 += [v]  # all else"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "incident-necessity",
   "metadata": {},
   "outputs": [],
   "source": [
    "if opt.adam:\n",
    "    # hyp['lr0'] *= 0.1  # reduce lr (i.e. SGD=5E-3, Adam=5E-4)\n",
    "    optimizer = optim.Adam(pg0, lr=hyp['lr0'])\n",
    "    # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)\n",
    "else:\n",
    "    optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)\n",
    "optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay\n",
    "optimizer.add_param_group({'params': pg2})  # add pg2 (biases)\n",
    "optimizer.param_groups[2]['lr'] *= 2.0  # bias lr\n",
    "del pg0, pg1, pg2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "continental-possible",
   "metadata": {},
   "outputs": [],
   "source": [
    "start_epoch = 0\n",
    "best_fitness = 0.0\n",
    "test_best_iou = 0.0\n",
    "if weights.endswith('.pt'):  # pytorch format\n",
    "    # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.\n",
    "    print(\"load yolo-weights...\")\n",
    "    chkpt = torch.load(weights, map_location=device)\n",
    "    #print(chkpt)\n",
    "\n",
    "    # load model\n",
    "    try:\n",
    "        chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}\n",
    "        #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}\n",
    "        #chkpt = {k: v for k, v in chkpt.items() if k in model.state_dict()}\n",
    "        model.load_state_dict(chkpt, strict=False)\n",
    "        print(\"successfully load model\")\n",
    "    except KeyError as e:\n",
    "        s = \"%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. \" % (opt.weights, opt.cfg, opt.weights)\n",
    "        raise KeyError(s) from e\n",
    "        \n",
    "    # load optimizer\n",
    "    if chkpt['optimizer'] is not None:\n",
    "        optimizer.load_state_dict(chkpt['optimizer'])\n",
    "        best_fitness = chkpt['best_fitness']\n",
    "        print(\"successfully load optimizer\")\n",
    "        #print(chkpt['optimizer'])\n",
    "\n",
    "    # load results\n",
    "    if chkpt.get('training_results') is not None:\n",
    "        with open(results_file, 'w') as file:\n",
    "            file.write(chkpt['training_results'])  # write results.txt\n",
    "        print(\"successfully load results\")\n",
    "        print(chkpt['training_results'])\n",
    "        \n",
    "\n",
    "    #start_epoch = chkpt['epoch'] + 1\n",
    "    del chkpt\n",
    "\n",
    "elif len(weights) > 0:  # darknet format\n",
    "    # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.\n",
    "    load_darknet_weights(model, weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "elect-elephant",
   "metadata": {},
   "outputs": [],
   "source": [
    "lf = lambda x: (1 + math.cos(x * math.pi / epochs)) / 2 * 0.99 + 0.01  # cosine https://arxiv.org/pdf/1812.01187.pdf\n",
    "scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n",
    "scheduler.last_epoch = start_epoch\n",
    "\n",
    "dataset = LoadImagesAndLabels(train_path, img_size, batch_size,\n",
    "                              augment=True,\n",
    "                              hyp=hyp,  # augmentation hyperparameters\n",
    "                              rect=opt.rect,  # rectangular training\n",
    "                              cache_images=opt.cache_images,\n",
    "                              single_cls=opt.single_cls)\n",
    "\n",
    "batch_size = min(batch_size, len(dataset))\n",
    "nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n",
    "dataloader = torch.utils.data.DataLoader(dataset,\n",
    "                                         batch_size=batch_size,\n",
    "                                         num_workers=nw,\n",
    "                                         shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used\n",
    "                                         pin_memory=True,\n",
    "                                         collate_fn=dataset.collate_fn)\n",
    "\n",
    "testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, img_size_test, batch_size * 2,\n",
    "                                                                 hyp=hyp,\n",
    "                                                                 rect=False,\n",
    "                                                                 cache_images=opt.cache_images,\n",
    "                                                                 single_cls=opt.single_cls),\n",
    "                                         batch_size=batch_size * 2,\n",
    "                                         num_workers=nw,\n",
    "                                         pin_memory=True,\n",
    "                                         collate_fn=dataset.collate_fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "italian-ability",
   "metadata": {},
   "outputs": [],
   "source": [
    "nb = len(dataloader)\n",
    "prebias = start_epoch == 0\n",
    "model.nc = nc  # attach number of classes to model\n",
    "model.arc = opt.arc  # attach yolo architecture\n",
    "model.hyp = hyp  # attach hyperparameters to model\n",
    "model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights\n",
    "maps = np.zeros(nc)  # mAP per class\n",
    "# torch.autograd.set_detect_anomaly(True)\n",
    "results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'\n",
    "t0 = time.time()\n",
    "torch_utils.model_info(model, report='summary')  # 'full' or 'summary'\n",
    "print('Using %g dataloader workers' % nw)\n",
    "print('Starting training for %g epochs...' % epochs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "decent-external",
   "metadata": {},
   "outputs": [],
   "source": [
    "for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------\n",
    "    model.train()\n",
    "    model.gr = 1 - (1 + math.cos(min(epoch * 2, epochs) * math.pi / epochs)) / 2  # GIoU <-> 1.0 loss ratio\n",
    "\n",
    "    # Prebias\n",
    "    if prebias:\n",
    "        ne = max(round(30 / nb), 3)  # number of prebias epochs\n",
    "        ps = np.interp(epoch, [0, ne], [0.1, hyp['lr0'] * 2]), \\\n",
    "                np.interp(epoch, [0, ne], [0.9, hyp['momentum']])  # prebias settings (lr=0.1, momentum=0.9)\n",
    "        if epoch == ne:\n",
    "            # print_model_biases(model)\n",
    "            prebias = False\n",
    "\n",
    "        # Bias optimizer settings\n",
    "        optimizer.param_groups[2]['lr'] = ps[0]\n",
    "        if optimizer.param_groups[2].get('momentum') is not None:  # for SGD but not Adam\n",
    "            optimizer.param_groups[2]['momentum'] = ps[1]\n",
    "\n",
    "    mloss = torch.zeros(4).to(device)  # mean losses\n",
    "    print(('\\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))\n",
    "    pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar\n",
    "    for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------\n",
    "        ni = i + nb * epoch  # number integrated batches (since train start)\n",
    "        imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0\n",
    "        targets = targets.to(device)\n",
    "        \"\"\"\n",
    "        if ni < 1:\n",
    "            f = 'train_batch%g.png' % i  # filename\n",
    "            plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)\n",
    "            if tb_writer:\n",
    "                tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC')\n",
    "        \"\"\"\n",
    "        # Multi-Scale training\n",
    "        if opt.multi_scale:\n",
    "            if ni / accumulate % 1 == 0:  #  adjust img_size (67% - 150%) every 1 batch\n",
    "                img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32\n",
    "            sf = img_size / max(imgs.shape[2:])  # scale factor\n",
    "            if sf != 1:\n",
    "                ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]]  # new shape (stretched to 16-multiple)\n",
    "                imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)\n",
    "\n",
    "        # Run model\n",
    "        pred = model(imgs)\n",
    "\n",
    "        # Compute loss\n",
    "        loss, loss_items = compute_loss(pred, targets, model)\n",
    "        if not torch.isfinite(loss):\n",
    "            print('WARNING: non-finite loss, ending training ', loss_items)\n",
    "            #return results\n",
    "\n",
    "        # Scale loss by nominal batch_size of 64\n",
    "        loss *= batch_size / 64\n",
    "\n",
    "            \n",
    "        loss.backward()\n",
    "\n",
    "        # Optimize accumulated gradient\n",
    "        if ni % accumulate == 0:\n",
    "            optimizer.step()\n",
    "            optimizer.zero_grad()\n",
    "\n",
    "            # Print batch results\n",
    "        mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses\n",
    "        mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0)  # (GB)\n",
    "        s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size)\n",
    "        pbar.set_description(s)\n",
    "    \n",
    "    scheduler.step()\n",
    "    final_epoch = epoch + 1 == epochs\n",
    "    if not opt.notest or final_epoch:  # Calculate mAP\n",
    "        is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80\n",
    "        results = test.test(cfg,\n",
    "                            data,\n",
    "                            batch_size=batch_size * 2,\n",
    "                            img_size=img_size_test,\n",
    "                            model=model,\n",
    "                            conf_thres=0.001,  # 0.001 if opt.evolve or (final_epoch and is_coco) else 0.01,\n",
    "                            iou_thres=0.6,\n",
    "                            save_json=final_epoch and is_coco,\n",
    "                            single_cls=opt.single_cls,\n",
    "                            dataloader=testloader)\n",
    "    with open(results_file, 'a') as f:\n",
    "        f.write(s + '%10.3g' * len(results) % results + '\\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)\n",
    "    if len(opt.name) and opt.bucket:\n",
    "        os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name))\n",
    "    \n",
    "    if tb_writer:\n",
    "        x = list(mloss) + list(results)\n",
    "        titles = ['GIoU', 'Objectness', 'Classification', 'Train loss',\n",
    "                  'iou', 'loss', 'Giou loss', 'obj loss']\n",
    "        for xi, title in zip(x, titles):\n",
    "            tb_writer.add_scalar(title, xi, epoch)\n",
    "    \n",
    "    fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]\n",
    "    if fi > best_fitness:\n",
    "        best_fitness = fi\n",
    "        \n",
    "    test_iou = results[0]\n",
    "    if test_iou > test_best_iou:\n",
    "        test_best_iou = test_iou\n",
    "\n",
    "    # Save training results\n",
    "    save = (not opt.nosave) or (final_epoch and not opt.evolve)\n",
    "    \n",
    "    if save:\n",
    "        with open(results_file, 'r') as f:\n",
    "            # Create checkpoint\n",
    "            chkpt = {'epoch': epoch,\n",
    "                     'best_fitness': best_fitness,\n",
    "                     'training_results': f.read(),\n",
    "                     'model': model.module.state_dict() if type(\n",
    "                             model) is nn.parallel.DistributedDataParallel else model.state_dict(),\n",
    "                     'optimizer': None if final_epoch else optimizer.state_dict()}\n",
    "\n",
    "        # Save last checkpoint\n",
    "        torch.save(chkpt, last)\n",
    "\n",
    "        # Save best checkpoint\n",
    "        if best_fitness == fi:\n",
    "            torch.save(chkpt, best)\n",
    "            \n",
    "        if test_iou == test_best_iou:\n",
    "            torch.save(chkpt, test_best)\n",
    "\n",
    "        # Save backup every 10 epochs (optional)\n",
    "        # if epoch > 0 and epoch % 10 == 0:\n",
    "        #     torch.save(chkpt, wdir + 'backup%g.pt' % epoch)\n",
    "\n",
    "        # Delete checkpoint\n",
    "        del chkpt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "infectious-cover",
   "metadata": {},
   "outputs": [],
   "source": [
    "n = opt.name\n",
    "if len(n):\n",
    "    n = '_' + n if not n.isnumeric() else n\n",
    "    fresults, flast, fbest = 'results%s.txt' % n, 'last%s.pt' % n, 'best%s.pt' % n\n",
    "    os.rename('results.txt', fresults)\n",
    "    os.rename(wdir + 'last.pt', wdir + flast) if os.path.exists(wdir + 'last.pt') else None\n",
    "    os.rename(wdir + 'best.pt', wdir + fbest) if os.path.exists(wdir + 'best.pt') else None\n",
    "    if opt.bucket:  # save to cloud\n",
    "        os.system('gsutil cp %s gs://%s/results' % (fresults, opt.bucket))\n",
    "        os.system('gsutil cp %s gs://%s/weights' % (wdir + flast, opt.bucket))\n",
    "        # os.system('gsutil cp %s gs://%s/weights' % (wdir + fbest, opt.bucket))\n",
    "\n",
    "#if not opt.evolve:\n",
    "#    plot_results()  # save as results.png\n",
    "print('%g epochs completed in %.3f hours.\\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))\n",
    "dist.destroy_process_group() if torch.cuda.device_count() > 1 else None\n",
    "torch.cuda.empty_cache()\n",
    "\n",
    "print(results)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
